/* 
   Copyright (C) 2020 Captain4LK (Lukas Holzbeierlein)

   Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

   1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.

   2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.

   3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.

   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

/*
Based on gl-matrix: https://github.com/toji/gl-matrix 

Original license notice:

Copyright (c) 2015-2020, Brandon Jones, Colin MacKenzie IV.

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

#include "../include/ULK_matrix.h"

#define EPSILON 0.000001f
#define MAX(a,b) (a>b?a:b)
#define MIN(a,b) (a<b?a:b)

void ULK_matrix_2x2_add(ULK_matrix_2x2 out, const ULK_matrix_2x2 a, const ULK_matrix_2x2 b)
{
   out[0] = a[0]+b[0];
   out[1] = a[1]+b[1];
   out[2] = a[2]+b[2];
   out[3] = a[3]+b[3];
}

void ULK_matrix_2x2_adjoint(ULK_matrix_2x2 out, const ULK_matrix_2x2 in)
{
   float in0 = in[0];
   
   out[0] = in[3];
   out[1] = -in[1];
   out[2] = -in[2];
   out[3] = in0;
}

ULK_matrix_2x2 *ULK_matrix_2x2_clone(const ULK_matrix_2x2 in)
{
   ULK_matrix_2x2 *out = malloc(sizeof(ULK_matrix_2x2));
   
   *out[0] = in[0];
   *out[1] = in[1];
   *out[2] = in[2];
   *out[3] = in[3];
   
   return out;
}

void ULK_matrix_2x2_copy(ULK_matrix_2x2 dst, const ULK_matrix_2x2 src)
{
   dst[0] = src[0];
   dst[1] = src[1];
   dst[2] = src[2];
   dst[3] = src[3];
}

ULK_matrix_2x2 *ULK_matrix_2x2_create()
{
   ULK_matrix_2x2 *out = malloc(sizeof(ULK_matrix_2x2));
   
   *out[0] = 1.0f;
   *out[1] = 0.0f;
   *out[2] = 0.0f;
   *out[3] = 1.0f;
   
   return out;
}

float ULK_matrix_2x2_determinant(const ULK_matrix_2x2 in)
{
   return in[0]*in[3]-in[2]*in[1];
}

int ULK_matrix_2x2_equals(const ULK_matrix_2x2 a, const ULK_matrix_2x2 b)
{
   return (fabs(a[0]-b[0])<=EPSILON*MAX(1.0f,MAX(fabs(a[0]),fabs(b[0])))&&fabs(a[1]-b[1])<=EPSILON*MAX(1.0f,MAX(fabs(a[1]),fabs(b[1])))&&fabs(a[2]-b[2])<=EPSILON*MAX(1.0f,MAX(fabs(a[2]),fabs(b[2])))&&fabs(a[3]-b[3])<=EPSILON*MAX(1.0f,MAX(fabs(a[3]),fabs(b[3]))));
}

int ULK_matrix_2x2_exact_equals(const ULK_matrix_2x2 a, const ULK_matrix_2x2 b)
{
   return a[0]==b[0]&&a[1]==b[1]&&a[2]==b[2]&&a[3]==b[3];
}

float ULK_matrix_2x2_frob(const ULK_matrix_2x2 in)
{
   return sqrtf(in[0]*in[0]+in[1]*in[1]+in[2]*in[2]+in[3]*in[3]);
}

void ULK_matrix_2x2_from_rotation(ULK_matrix_2x2 out, float rad)
{
   float s = sinf(rad);
   float c = cosf(rad);
   
   out[0] = c;
   out[1] = s;
   out[2] = -s;
   out[3] = c;
}

void ULK_matrix_2x2_from_scaling(ULK_matrix_2x2 out, const ULK_vector_2d scaling)
{
   out[0] = scaling[0];
   out[1] = 0.0f;
   out[2] = 0.0f;
   out[3] = scaling[1];
}

ULK_matrix_2x2 *ULK_matrix_2x2_from_values(float m00, float m01, float m10, float m11)
{
   ULK_matrix_2x2 *out = malloc(sizeof(ULK_matrix_2x2));
   
   *out[0] = m00;
   *out[1] = m01;
   *out[2] = m10;
   *out[3] = m11;
   
   return out;
}

void ULK_matrix_2x2_identity(ULK_matrix_2x2 out)
{
   out[0] = 1.0f;
   out[1] = 0.0f;
   out[2] = 0.0f;
   out[3] = 1.0f;
}

void ULK_matrix_2x2_invert(ULK_matrix_2x2 out, const ULK_matrix_2x2 in)
{
   float in0 = in[0];
   float in1 = in[1];
   float in2 = in[2];
   float in3 = in[3];
   float determinant = in0*in3-in2*in1;
   
   if(determinant!=0.0f)
      determinant = 1.0f/determinant;
      
   out[0] = in3*determinant;
   out[1] = -in1*determinant;
   out[2] = -in2*determinant;
   out[3] = in0*determinant;
}

void ULK_matrix_2x2_LDU(ULK_matrix_2x2 L, ULK_matrix_2x2 D, ULK_matrix_2x2 U, const ULK_matrix_2x2 in)
{
   L[2] = in[2]/in[0];
   U[0] = in[0];
   U[1] = in[1];
   U[3] = in[3]-L[2]*U[1];
}

void ULK_matrix_2x2_multiply(ULK_matrix_2x2 out, const ULK_matrix_2x2 a, const ULK_matrix_2x2 b)
{
   float a0 = a[0];
   float a1 = a[1];
   float a2 = a[2];
   float a3 = a[3];
   float b0 = b[0];
   float b1 = b[1];
   float b2 = b[2];
   float b3 = b[3];
   
   out[0] = a0*b0+a2*b1;
   out[1] = a1*b0+a3*b1;
   out[2] = a0*b2+a2*b3;
   out[3] = a1*b2+a3*b3;
}

void ULK_matrix_2x2_multiply_scalar(ULK_matrix_2x2 out, const ULK_matrix_2x2 in, float scale)
{
   out[0] = in[0]*scale;
   out[1] = in[1]*scale;
   out[2] = in[2]*scale;
   out[3] = in[3]*scale;
}

void ULK_matrix_2x2_rotate(ULK_matrix_2x2 out, const ULK_matrix_2x2 in, float rad)
{
   float in0 = in[0];
   float in1 = in[1];
   float in2 = in[2];
   float in3 = in[3];
   float c = cosf(rad);
   float s = sinf(rad);
   
   out[0] = in0*c+in2*s;
   out[1] = in1*c+in3*s;
   out[2] = in0*(-s)+in2*c;
   out[3] = in1*(-s)+in3*c;
}

void ULK_matrix_2x2_scale(ULK_matrix_2x2 out, const ULK_matrix_2x2 in, const ULK_vector_2d scale)
{
   out[0] = in[0]*scale[0];
   out[1] = in[1]*scale[0];
   out[2] = in[2]*scale[1];
   out[3] = in[3]*scale[1];
}

void ULK_matrix_2x2_set(ULK_matrix_2x2 out, float m00, float m01, float m10, float m11)
{
   out[0] = m00;
   out[1] = m01;
   out[2] = m10;
   out[3] = m11;
}

void ULK_matrix_2x2_subtract(ULK_matrix_2x2 out, const ULK_matrix_2x2 a, const ULK_matrix_2x2 b)
{
   out[0] = a[0]-b[0];
   out[1] = a[1]-b[1];
   out[2] = a[2]-b[2];
   out[3] = a[3]-b[3];
}

void ULK_matrix_2x2_transpose(ULK_matrix_2x2 out, const ULK_matrix_2x2 in)
{
   float in1 = in[1];
   float in2 = in[2];
   
   out[0] = in[0];
   out[1] = in2;
   out[2] = in1;
   out[3] = in[3];
}

void ULK_matrix_2x3_add(ULK_matrix_2x3 out, const ULK_matrix_2x3 a, const ULK_matrix_2x3 b)
{
   out[0] = a[0]+b[0];
   out[1] = a[1]+b[1];
   out[2] = a[2]+b[2];
   out[3] = a[3]+b[3];
   out[4] = a[4]+b[4];
   out[5] = a[5]+b[5];
}

ULK_matrix_2x3 *ULK_matrix_2x3_clone(const ULK_matrix_2x3 in)
{
   ULK_matrix_2x3 *out = malloc(sizeof(ULK_matrix_2x3));

   *out[0] = in[0];
   *out[1] = in[1];
   *out[2] = in[2];
   *out[3] = in[3];
   *out[4] = in[4];
   *out[5] = in[5];

   return out;
}

void ULK_matrix_2x3_copy(ULK_matrix_2x3 dst, const ULK_matrix_2x3 src)
{
   dst[0] = src[0];
   dst[1] = src[1];
   dst[2] = src[2];
   dst[3] = src[3];
   dst[4] = src[4];
   dst[5] = src[5];
}

ULK_matrix_2x3 *ULK_matrix_2x3_create()
{
   ULK_matrix_2x3 *out = malloc(sizeof(ULK_matrix_2x3));

   *out[0] = 1.0f;
   *out[1] = 0.0f;
   *out[2] = 0.0f;
   *out[3] = 1.0f;
   *out[4] = 0.0f;
   *out[5] = 0.0f;

   return out;
}

float ULK_matrix_2x3_determinant(const ULK_matrix_2x3 in)
{
   return in[0]*in[3]-in[1]*in[2];
}

int ULK_matrix_2x3_equals(const ULK_matrix_2x3 a, const ULK_matrix_2x3 b)
{
   return (fabs(a[0]-b[0])<=EPSILON*MAX(1.0f,MAX(fabs(a[0]),fabs(b[0])))&&fabs(a[1]-b[1])<=EPSILON*MAX(1.0f,MAX(fabs(a[1]),fabs(b[1])))&&fabs(a[2]-b[2])<=EPSILON*MAX(1.0f,MAX(fabs(a[2]),fabs(b[2])))&&fabs(a[3]-b[3])<=EPSILON*MAX(1.0f,MAX(fabs(a[3]),fabs(b[3])))&&fabs(a[4]-b[4])<=EPSILON*MAX(1.0f,MAX(fabs(a[4]),fabs(b[4])))&&fabs(a[5]-b[5])<=EPSILON*MAX(1.0f,MAX(fabs(a[5]),fabs(b[5]))));
}

int ULK_matrix_2x3_exact_equals(const ULK_matrix_2x3 a, const ULK_matrix_2x3 b)
{
   return a[0]==b[0]&&a[1]==b[1]&&a[2]==b[2]&&a[3]==b[3]&&a[4]==b[4]&&a[5]==b[5];
}

float ULK_matrix_2x3_frob(const ULK_matrix_2x3 in)
{
   return sqrtf(in[0]*in[0]+in[1]*in[1]+in[2]*in[2]+in[3]*in[3]+in[4]*in[4]+in[5]*in[5]+1);
}

void ULK_matrix_2x3_from_rotation(ULK_matrix_2x3 out, float rad)
{
   float c = cosf(rad);
   float s = sinf(rad);

   out[0] = c;
   out[1] = s;
   out[2] = -s;
   out[3] = c;
   out[4] = 0.0f;
   out[5] = 0.0f;
}

void ULK_matrix_2x3_from_scaling(ULK_matrix_2x3 out, const ULK_vector_2d scaling)
{
   out[0] = scaling[0];
   out[1] = 0.0f;
   out[2] = 0.0f;
   out[3] = scaling[1];
   out[4] = 0.0f;
   out[5] = 0.0f;
}

void ULK_matrix_2x3_from_translation(ULK_matrix_2x3 out, const ULK_vector_2d translation)
{
   out[0] = 1.0f;
   out[1] = 0.0f;
   out[2] = 0.0f;
   out[3] = 1.0f;
   out[4] = translation[0];
   out[5] = translation[1];
}

ULK_matrix_2x3 *ULK_matrix_2x3_from_values(float a, float b, float c, float d, float tx, float ty)
{
   ULK_matrix_2x3 *out = malloc(sizeof(ULK_matrix_2x3));

   *out[0] = a;
   *out[1] = b;
   *out[2] = c;
   *out[3] = d;
   *out[4] = tx;
   *out[5] = ty;

   return out;
}

void ULK_matrix_2x3_identity(ULK_matrix_2x3 out)
{
   out[0] = 1.0f;
   out[1] = 0.0f;
   out[2] = 0.0f;
   out[3] = 1.0f;
   out[4] = 0.0f;
   out[5] = 0.0f;
}

void ULK_matrix_2x3_invert(ULK_matrix_2x3 out, const ULK_matrix_2x3 in)
{
   float ina = in[0];
   float inb = in[1];
   float inc = in[2];
   float ind = in[3];
   float intx = in[4];
   float inty = in[5];
   float determinant = ina*ind-inb*inc;

   if(determinant!=0.0f)
      determinant = 1.0f/determinant;

   out[0] = ind*determinant;
   out[1] = -inb*determinant;
   out[2] = -inc*determinant;
   out[3] = ina*determinant;
   out[4] = (inc*inty-ind*intx)*determinant;
   out[5] = (inb*intx-ina*inty)*determinant;
}

void ULK_matrix_2x3_multiply(ULK_matrix_2x3 out, const ULK_matrix_2x3 a, const ULK_matrix_2x3 b)
{
   float a0 = a[0];
   float a1 = a[1];
   float a2 = a[2];
   float a3 = a[3];
   float a4 = a[4];
   float a5 = a[5];
   float b0 = b[0];
   float b1 = b[1];
   float b2 = b[2];
   float b3 = b[3];
   float b4 = b[4];
   float b5 = b[5];

   out[0] = a0*b0+a2*b1;
   out[1] = a1*b0+a3*b1;
   out[2] = a0*b2+a2*b3;
   out[3] = a1*b2+a3*b3;
   out[4] = a0*b4+a2*b5+a4;
   out[5] = a1*b4+a3*b5+a5;
}

void ULK_matrix_2x3_multiply_scalar(ULK_matrix_2x3 out, const ULK_matrix_2x3 in, float scale)
{
   out[0] = in[0]*scale;
   out[1] = in[1]*scale;
   out[2] = in[2]*scale;
   out[3] = in[3]*scale;
   out[4] = in[4]*scale;
   out[5] = in[5]*scale;
}

void ULK_matrix_2x3_rotate(ULK_matrix_2x3 out, const ULK_matrix_2x3 in, float rad)
{
   float in0 = in[0];
   float in1 = in[1];
   float in2 = in[2];
   float in3 = in[3];
   float c = cosf(rad);
   float s = sinf(rad);

   out[0] = in0*c+in2*s;
   out[1] = in1*c+in3*s;
   out[2] = in0*(-s)+in2*c;
   out[3] = in1*(-s)+in3*c;
   out[4] = in[4];
   out[5] = in[5];
}

void ULK_matrix_2x3_scale(ULK_matrix_2x3 out, const ULK_matrix_2x3 in, const ULK_vector_2d scale)
{
   out[0] = in[0]*scale[0];
   out[1] = in[1]*scale[0];
   out[2] = in[2]*scale[1];
   out[3] = in[3]*scale[1];
   out[4] = in[4];
   out[5] = in[5];
}

void ULK_matrix_2x3_set(ULK_matrix_2x3 out, float a, float b, float c, float d, float tx, float ty)
{
   out[0] = a;
   out[1] = b;
   out[2] = c;
   out[3] = d;
   out[4] = tx;
   out[5] = ty;
}

void ULK_matrix_2x3_subtract(ULK_matrix_2x3 out, const ULK_matrix_2x3 a, const ULK_matrix_2x3 b)
{
   out[0] = a[0]-b[0];
   out[1] = a[1]-b[1];
   out[2] = a[2]-b[2];
   out[3] = a[3]-b[3];
   out[4] = a[4]-b[4];
   out[5] = a[5]-b[5];
}

void ULK_matrix_2x3_translate(ULK_matrix_2x3 out, const ULK_matrix_2x3 in, const ULK_vector_2d translation)
{
   out[0] = in[0];
   out[1] = in[1];
   out[2] = in[2];
   out[3] = in[3];
   out[4] = in[0]*translation[0]+in[2]*translation[1]+in[4];
   out[5] = in[1]*translation[0]+in[3]*translation[1]+in[5];
}

void ULK_matrix_3x3_add(ULK_matrix_3x3 out, const ULK_matrix_3x3 a, const ULK_matrix_3x3 b)
{
   out[0] = a[0]+b[0];
   out[1] = a[1]+b[1];
   out[2] = a[2]+b[2];
   out[3] = a[3]+b[3];
   out[4] = a[4]+b[4];
   out[5] = a[5]+b[5];
   out[6] = a[6]+b[6];
   out[7] = b[7]+a[7];
   out[8] = a[8]+b[8];
}

void ULK_matrix_3x3_adjoint(ULK_matrix_3x3 out, const ULK_matrix_3x3 in)
{
   float in00 = in[0];
   float in01 = in[1];
   float in02 = in[2];
   float in10 = in[3];
   float in11 = in[4];
   float in12 = in[5];
   float in20 = in[6];
   float in21 = in[7];
   float in22 = in[8];

   out[0] = in11*in22-in12*in21;
   out[1] = in02*in21-in01*in22;
   out[2] = in01*in12-in02*in11;
   out[3] = in12*in20-in10*in22;
   out[4] = in00*in22-in02*in20;
   out[5] = in02*in10-in00*in12;
   out[6] = in10*in21-in11*in20;
   out[7] = in01*in20-in00*in21;
   out[8] = in00*in11-in01*in10;
}

ULK_matrix_3x3 *ULK_matrix_3x3_clone(const ULK_matrix_3x3 in)
{
   ULK_matrix_3x3 *out = malloc(sizeof(ULK_matrix_3x3));

   *out[0] = in[0];
   *out[1] = in[1];
   *out[2] = in[2];
   *out[3] = in[3];
   *out[4] = in[4];
   *out[5] = in[5];
   *out[6] = in[6];
   *out[7] = in[7];
   *out[8] = in[8];

   return out;
}

void ULK_matrix_3x3_copy(ULK_matrix_3x3 out, const ULK_matrix_3x3 in)
{
   out[0] = in[0];
   out[1] = in[1];
   out[2] = in[2];
   out[3] = in[3];
   out[4] = in[4];
   out[5] = in[5];
   out[6] = in[6];
   out[7] = in[7];
   out[8] = in[8];
}

ULK_matrix_3x3 *ULK_matrix_3x3_create()
{
   ULK_matrix_3x3 *out = malloc(sizeof(ULK_matrix_3x3));

   *out[0] = 1.0f;
   *out[1] = 0.0f;
   *out[2] = 0.0f;
   *out[3] = 0.0f;
   *out[4] = 1.0f;
   *out[5] = 0.0f;
   *out[6] = 0.0f;
   *out[7] = 0.0f;
   *out[8] = 1.0f;

   return out;
}

float ULK_matrix_3x3_determinant(const ULK_matrix_3x3 in)
{
   return in[0]*(in[8]*in[4]-in[5]*in[7])+in[1]*(-in[8]*in[3]+in[5]*in[6])+in[2]*(in[7]*in[3]-in[4]*in[6]);
}

int ULK_matrix_3x3_equals(const ULK_matrix_3x3 a, const ULK_matrix_3x3 b)
{
   return (fabs(a[0]-b[0])<=EPSILON*MAX(1.0f,MAX(fabs(a[0]),fabs(b[0])))&&
      fabs(a[1]-b[1])<=EPSILON*MAX(1.0f,MAX(fabs(a[1]),fabs(b[1])))&&
      fabs(a[2]-b[2])<=EPSILON*MAX(1.0f,MAX(fabs(a[2]),fabs(b[2])))&&
      fabs(a[3]-b[3])<=EPSILON*MAX(1.0f,MAX(fabs(a[3]),fabs(b[3])))&&
      fabs(a[4]-b[4])<=EPSILON*MAX(1.0f,MAX(fabs(a[4]),fabs(b[4])))&&
      fabs(a[5]-b[5])<=EPSILON*MAX(1.0f,MAX(fabs(a[5]),fabs(b[5])))&&
      fabs(a[6]-b[6])<=EPSILON*MAX(1.0f,MAX(fabs(a[6]),fabs(b[6])))&&
      fabs(a[7]-b[7])<=EPSILON*MAX(1.0f,MAX(fabs(a[7]),fabs(b[7])))&&
      fabs(a[8]-b[8])<=EPSILON*MAX(1.0f,MAX(fabs(a[8]),fabs(b[8]))));
}

int ULK_matrix_3x3_exact_equals(const ULK_matrix_3x3 a, const ULK_matrix_3x3 b)
{
   return a[0]==b[0]&&
      a[1]==b[1]&&
      a[2]==b[2]&&
      a[3]==b[3]&&
      a[4]==b[4]&&
      a[5]==b[5]&&
      a[6]==b[6]&&
      a[7]==b[7]&&
      a[8]==b[8];
}

float ULK_matrix_3x3_frob(const ULK_matrix_3x3 in)
{
   return sqrtf(in[0]*in[0]+in[1]*in[1]+in[2]*in[2]+in[3]*in[3]+in[4]*in[4]+in[5]*in[5]+in[6]*in[6]+in[7]*in[7]+in[8]*in[8]);
}

void ULK_matrix_3x3_from_matrix_2x3(ULK_matrix_3x3 out, const ULK_matrix_2x3 in)
{
   out[0] = in[0];
   out[1] = in[1];
   out[2] = 0.0f;
   out[3] = in[2];
   out[4] = in[3];
   out[5] = 0.0f;
   out[6] = in[4];
   out[7] = in[5];
   out[8] = 1.0f;
}

void ULK_matrix_3x3_from_matrix_4x4(ULK_matrix_3x3 out, const ULK_matrix_4x4 in)
{
   out[0] = in[0];
   out[1] = in[1];
   out[2] = in[2];
   out[3] = in[4];
   out[4] = in[5];
   out[5] = in[6];
   out[6] = in[8];
   out[7] = in[9];
   out[8] = in[10];
}

void ULK_matrix_3x3_from_rotation(ULK_matrix_3x3 out, float rad)
{
   float c = cosf(rad);
   float s = sinf(rad);

   out[0] = c;
   out[1] = s;
   out[2] = 0.0f;
   out[3] = -s;
   out[4] = c;
   out[5] = 0.0f;
   out[6] = 0.0f;
   out[7] = 0.0f;
   out[8] = 1.0f;
}

void ULK_matrix_3x3_from_scaling(ULK_matrix_3x3 out, const ULK_vector_2d scaling)
{
   out[0] = scaling[0];
   out[1] = 0.0f;
   out[2] = 0.0f;
   out[3] = 0.0f;
   out[4] = scaling[1];
   out[5] = 0.0f;
   out[6] = 0.0f;
   out[7] = 0.0f;
   out[8] = 1.0f;
}

void ULK_matrix_3x3_from_translation(ULK_matrix_3x3 out, const ULK_vector_2d translation)
{
   out[0] = 1.0f;
   out[1] = 0.0f;
   out[2] = 0.0f;
   out[3] = 0.0f;
   out[4] = 0.0f;
   out[5] = 0.0f;
   out[6] = translation[0];
   out[7] = translation[1];
   out[8] = 1.0f;
}

ULK_matrix_3x3 *ULK_matrix_3x3_from_values(float m00, float m01, float m02, float m10, float m11, float m12, float m20, float m21, float m22)
{
   ULK_matrix_3x3 *out = malloc(sizeof(ULK_matrix_3x3));

   *out[0] = m00;
   *out[1] = m01;
   *out[2] = m02;
   *out[3] = m10;
   *out[4] = m11;
   *out[5] = m12;
   *out[6] = m20;
   *out[7] = m21;
   *out[8] = m22;

   return out;
}

void ULK_matrix_3x3_identity(ULK_matrix_3x3 out)
{
   out[0] = 1.0f;
   out[1] = 0.0f;
   out[2] = 0.0f;
   out[3] = 0.0f;
   out[4] = 1.0f;
   out[5] = 0.0f;
   out[6] = 0.0f;
   out[7] = 0.0f;
   out[8] = 1.0f;
}

void ULK_matrix_3x3_invert(ULK_matrix_3x3 out, const ULK_matrix_3x3 in)
{
   float in00 = in[0];
   float in01 = in[1];
   float in02 = in[2];
   float in10 = in[3];
   float in11 = in[4];
   float in12 = in[5];
   float in20 = in[6];
   float in21 = in[7];
   float in22 = in[8];

   float b01 = in22*in11-in12*in21;
   float b11 = -in22*in10+in12*in20;
   float b21 = in21*in10-in11*in20;

   float determinant = in00*b01+in01*b11+in02*b21;

   if(determinant!=0.0f)
      determinant = 1.0f/determinant;

   out[0] = b01*determinant;
   out[1] = (-in22*in01+in02*in21)*determinant;
   out[2] = (in12*in01-in02*in11)*determinant;
   out[3] = b11*determinant;
   out[4] = (in22*in00-in02*in20)*determinant;
   out[5] = (-in12*in00+in02*in10)*determinant;
   out[6] = b21*determinant;
   out[7] = (-in21*in00+in01*in20)*determinant;
   out[8] = (in11*in00-in01*in10)*determinant;
}

void ULK_matrix_3x3_multiply(ULK_matrix_3x3 out, const ULK_matrix_3x3 a, const ULK_matrix_3x3 b)
{
   float a00 = a[0];
   float a01 = a[1];
   float a02 = a[2];
   float a10 = a[3];
   float a11 = a[4];
   float a12 = a[5];
   float a20 = a[6];
   float a21 = a[7];
   float a22 = a[8];

   float b00 = b[0];
   float b01 = b[1];
   float b02 = b[2];
   float b10 = b[3];
   float b11 = b[4];
   float b12 = b[5];
   float b20 = b[6];
   float b21 = b[7];
   float b22 = b[8];

   out[0] = b00*a00+b01*a10+b02*a20;
   out[1] = b00*a01+b01*a11+b02*a21;
   out[2] = b00*a02+b01*a12+b02*a22;
   out[3] = b10*a00+b11*a10+b12*a20;
   out[4] = b10*a01+b11*a11+b12*a21;
   out[5] = b10*a02+b11*a12+b12*a22;
   out[6] = b20*a00+b21*a10+b22*a20;
   out[7] = b20*a01+b21*a11+b22*a21;
   out[8] = b20*a02+b21*a12+b22*a22;
}

void ULK_matrix_3x3_mutiply_scalar(ULK_matrix_3x3 out, const ULK_matrix_3x3 in, float scale)
{
   out[0] = in[0]*scale;
   out[1] = in[1]*scale;
   out[2] = in[2]*scale;
   out[3] = in[3]*scale;
   out[4] = in[4]*scale;
   out[5] = in[5]*scale;
   out[6] = in[6]*scale;
   out[7] = in[7]*scale;
   out[8] = in[8]*scale;
}

void ULK_matrix_3x3_normal_from_matrix_4x4(ULK_matrix_3x3 out, const ULK_matrix_4x4 in)
{
   float b00 = in[0]*in[5]-in[1]*in[4];
   float b01 = in[0]*in[6]-in[2]*in[4];
   float b02 = in[0]*in[7]-in[3]*in[4];
   float b03 = in[1]*in[6]-in[2]*in[5];
   float b04 = in[1]*in[7]-in[3]*in[5];
   float b05 = in[2]*in[7]-in[3]*in[6];
   float b06 = in[8]*in[13]-in[9]*in[12];
   float b07 = in[8]*in[14]-in[10]*in[12];
   float b08 = in[8]*in[15]-in[11]*in[12];
   float b09 = in[9]*in[14]-in[10]*in[13];
   float b10 = in[9]*in[15]-in[11]*in[13];
   float b11 = in[10]*in[15]-in[11]*in[14];

   float determinant = b00*b11-b01*b10-b02*b09-b03*b08-b04*b07+b05*b06;

   if(determinant!=0.0f)
      determinant = 1.0f/determinant;

   out[0] = (in[5]*b11-in[6]*b10+in[7]*b09)*determinant;
   out[1] = (in[6]*b08-in[4]*b11-in[7]*b07)*determinant;
   out[2] = (in[4]*b11-in[5]*b08+in[7]*b06)*determinant;
   out[3] = (in[2]*b10-in[1]*b11-in[3]*b09)*determinant;
   out[4] = (in[0]*b11-in[2]*b08+in[3]*b07)*determinant;
   out[5] = (in[1]*b08-in[0]*b10-in[3]*b06)*determinant;
   out[6] = (in[13]*b05-in[14]*b04+in[15]*b03)*determinant;
   out[7] = (in[14]*b02-in[12]*b05-in[15]*b01)*determinant;
   out[8] = (in[12]*b04-in[13]*b02+in[15]*b00)*determinant;
}

void ULK_matrix_3x3_projection(ULK_matrix_3x3 out, float width, float height)
{
   out[0] = 2.0f/width;
   out[1] = 0.0f;
   out[2] = 0.0f;
   out[3] = 0.0f;
   out[4] = -2.0f/height;
   out[5] = 0.0f;
   out[6] = -1.0f;
   out[7] = 1.0f;
   out[8] = 1.0f;
}

void ULK_matrix_3x3_rotate(ULK_matrix_3x3 out, const ULK_matrix_3x3 in, float rad)
{
   float in00 = in[0];
   float in01 = in[1];
   float in02 = in[2];
   float in10 = in[3];
   float in11 = in[4];
   float in12 = in[5];
   float in20 = in[6];
   float in21 = in[7];
   float in22 = in[8];

   float s = sinf(rad);
   float c = cosf(rad);

   out[0] = c*in00+s*in10;
   out[1] = c*in01+s*in11;
   out[2] = c*in02+s*in12;
   out[3] = c*in10-s*in00;
   out[4] = c*in11-s*in01;
   out[5] = c*in12-s*in02;
   out[6] = in20;
   out[7] = in21;
   out[8] = in22;
}

void ULK_matrix_3x3_scale(ULK_matrix_3x3 out, const ULK_matrix_3x3 in, const ULK_vector_2d scale)
{
   out[0] = scale[0]*in[0];
   out[1] = scale[0]*in[1];
   out[2] = scale[0]*in[2];
   out[3] = scale[1]*in[3];
   out[4] = scale[1]*in[4];
   out[5] = scale[1]*in[5];
   out[6] = in[6];
   out[7] = in[7];
   out[8] = in[8];
}

void ULK_matrix_3x3_set(ULK_matrix_3x3 out, float m00, float m01, float m02, float m10, float m11, float m12, float m20, float m21, float m22)
{
   out[0] = m00;
   out[1] = m01;
   out[2] = m02;
   out[3] = m10;
   out[4] = m11;
   out[5] = m12;
   out[6] = m20;
   out[7] = m21;
   out[8] = m22;
}

void ULK_matrix_3x3_subtract(ULK_matrix_3x3 out, const ULK_matrix_3x3 a, const ULK_matrix_3x3 b)
{
   out[0] = a[0]-b[0];
   out[1] = a[1]-b[1];
   out[2] = a[2]-b[2];
   out[3] = a[3]-b[3];
   out[4] = a[4]-b[4];
   out[5] = a[5]-b[5];
   out[6] = a[6]-b[6];
   out[7] = a[7]-b[7];
   out[8] = a[8]-b[8];
}

void ULK_matrix_3x3_translate(ULK_matrix_3x3 out, const ULK_matrix_3x3 in, const ULK_vector_2d translation)
{
   float in0 = in[0];
   float in1 = in[1];
   float in2 = in[2];
   float in3 = in[3];
   float in4 = in[4];
   float in5 = in[5];
   float in6 = in[6];
   float in7 = in[7];
   float in8 = in[8];

   out[0] = in0;
   out[1] = in1;
   out[2] = in2;
   out[3] = in3;
   out[4] = in4;
   out[5] = in5;
   out[6] = translation[0]*in0+translation[1]*in3+in6;
   out[7] = translation[0]*in1+translation[1]*in4+in7;
   out[8] = translation[0]*in2+translation[1]*in5+in8;
}

void ULK_matrix_3x3_transpose(ULK_matrix_3x3 out, const ULK_matrix_3x3 in)
{
   float in00 = in[0];
   float in01 = in[1];
   float in02 = in[2];
   float in10 = in[3];
   float in11 = in[4];
   float in12 = in[5];
   float in20 = in[6];
   float in21 = in[7];
   float in22 = in[8];

   out[0] = in00;
   out[1] = in10;
   out[2] = in20;
   out[3] = in01;
   out[4] = in11;
   out[5] = in21;
   out[6] = in02;
   out[7] = in12;
   out[8] = in22;
}

void ULK_matrix_4x4_add(ULK_matrix_4x4 out, const ULK_matrix_4x4 a, const ULK_matrix_4x4 b)
{
   out[0] = a[0]+b[0];
   out[1] = a[1]+b[1];
   out[2] = a[2]+b[2];
   out[3] = a[3]+b[3];
   out[4] = a[4]+b[4];
   out[5] = a[5]+b[5];
   out[6] = a[6]+b[6];
   out[7] = a[7]+b[7];
   out[8] = a[8]+b[8];
   out[9] = a[9]+b[9];
   out[10] = a[10]+b[10];
   out[11] = a[11]+b[11];
   out[12] = a[12]+b[12];
   out[13] = a[13]+b[13];
   out[14] = a[14]+b[14];
   out[15] = a[15]+b[15];
}

void ULK_matrix_4x4_adjoint(ULK_matrix_4x4 out, const ULK_matrix_4x4 in)
{
   float in00 = in[0];
   float in01 = in[1];
   float in02 = in[2];
   float in03 = in[3];
   float in10 = in[4];
   float in11 = in[5];
   float in12 = in[6];
   float in13 = in[7];
   float in20 = in[8];
   float in21 = in[9];
   float in22 = in[10];
   float in23 = in[11];
   float in30 = in[12];
   float in31 = in[13];
   float in32 = in[14];
   float in33 = in[15];

   out[0] = in11*(in22*in33-in23*in32)-in21*(in12*in33-in13*in32)+in31*(in12*in23-in13*in22);
   out[1] = -(in01*(in22*in33-in23*in32)-in21*(in02*in33-in03*in32)+in31*(in02*in23-in03*in22));
   out[2] = in01*(in12*in33-in13*in32)-in11*(in02*in33-in03*in32)+in31*(in02*in13+in03*in12);
   out[3] = -(in01*(in12*in23-in13*in22)-in11*(in02*in23-in03*in22)+in21*(in02*in13-in03*in12));
   out[4] = -(in10*(in22*in33-in23*in32)-in20*(in12*in33-in13*in32)+in30*(in12*in23-in13*in22));
   out[5] = in00*(in22*in33-in23*in32)-in20*(in02*in33-in03*in32)+in30*(in02*in23-in03*in22);
   out[6] = -(in00*(in12*in33-in13*in32)-in10*(in02*in33-in03*in32)+in30*(in02*in13-in03*in12));
   out[7] = in00*(in12*in23-in13*in22)-in10*(in02*in23-in03*in22)+in20*(in02*in13-in03*in12);
   out[8] = in10*(in21*in33-in23*in31)-in20*(in11*in33-in13*in31)+in30*(in11*in23-in13*in21);
   out[9] = -(in00*(in21*in33-in23*in31)-in20*(in01*in33-in03*in31)+in30*(in01*in23-in03*in21));
   out[10] = in00*(in11*in33-in13*in31)-in10*(in01*in33-in03*in31)+in30*(in01*in13-in03*in11);
   out[11] = -(in00*(in11*in23-in13*in21)-in10*(in01*in23-in03*in21)+in20*(in01*in13-in03*in11));
   out[12] = -(in10*(in21*in32-in22*in31)-in20*(in11*in32-in12*in31)+in30*(in11*in22*in12*in21));
   out[13] = in00*(in21*in32-in22*in31)-in20*(in01*in32-in02*in31)+in30*(in01*in22-in02*in21);
   out[14] = -(in00*(in11*in32-in12*in31)-in10*(in01*in32-in02*in31)+in30*(in01*in12-in02*in11));
   out[15] = in00*(in11*in22-in12*in21)-in10*(in01*in22-in02*in21)+in20*(in01*in12-in02*in11);
}

ULK_matrix_4x4 *ULK_matrix_4x4_clone(const ULK_matrix_4x4 in)
{
   ULK_matrix_4x4 *out = malloc(sizeof(ULK_matrix_4x4));

   *out[0] = in[0];
   *out[1] = in[1];
   *out[2] = in[2];
   *out[3] = in[3];
   *out[4] = in[4];
   *out[5] = in[5];
   *out[6] = in[6];
   *out[7] = in[7];
   *out[8] = in[8];
   *out[9] = in[9];
   *out[10] = in[10];
   *out[11] = in[11];
   *out[12] = in[12];
   *out[13] = in[13];
   *out[14] = in[14];
   *out[15] = in[15];

   return out;
}

void ULK_matrix_4x4_copy(ULK_matrix_4x4 out, const ULK_matrix_4x4 in)
{
   out[0] = in[0];
   out[1] = in[1];
   out[2] = in[2];
   out[3] = in[3];
   out[4] = in[4];
   out[5] = in[5];
   out[6] = in[6];
   out[7] = in[7];
   out[8] = in[8];
   out[9] = in[9];
   out[10] = in[10];
   out[11] = in[11];
   out[12] = in[12];
   out[13] = in[13];
   out[14] = in[14];
   out[15] = in[15];
}

ULK_matrix_4x4 *ULK_matrix_4x4_create()
{
   ULK_matrix_4x4 *out = malloc(sizeof(ULK_matrix_4x4));

   *out[0] = 1.0f;
   *out[1] = 0.0f;
   *out[2] = 0.0f;
   *out[3] = 0.0f;
   *out[4] = 0.0f;
   *out[5] = 1.0f;
   *out[6] = 0.0f;
   *out[7] = 0.0f;
   *out[8] = 0.0f;
   *out[9] = 0.0f;
   *out[10] = 1.0f;
   *out[11] = 0.0f;
   *out[12] = 0.0f;
   *out[13] = 0.0f;
   *out[14] = 0.0f;
   *out[15] = 1.0f;

   return out;
}

float ULK_matrix_4x4_determinant(const ULK_matrix_4x4 in)
{
   float b00 = in[0]*in[5]-in[1]*in[4];
   float b01 = in[0]*in[6]-in[2]*in[4];
   float b02 = in[0]*in[7]-in[3]*in[4];
   float b03 = in[1]*in[6]-in[2]*in[5];
   float b04 = in[1]*in[7]-in[3]*in[5];
   float b05 = in[2]*in[7]-in[3]*in[6];
   float b06 = in[8]*in[13]-in[9]*in[12];
   float b07 = in[8]*in[14]-in[10]*in[12];
   float b08 = in[8]*in[15]-in[11]*in[12];
   float b09 = in[9]*in[14]-in[10]*in[13];
   float b10 = in[9]*in[15]-in[11]*in[13];
   float b11 = in[10]*in[15]-in[11]*in[14];

   return b00*b11-b01*b10+b02*b09+b03*b08-b04*b07+b05*b06;
}

int ULK_matrix_4x4_equals(const ULK_matrix_4x4 a, const ULK_matrix_4x4 b)
{
   return (fabs(a[0]-b[0])<=EPSILON*MAX(1.0f,MAX(fabs(a[0]),fabs(b[0])))&&
      fabs(a[1]-b[1])<=EPSILON*MAX(1.0f,MAX(fabs(a[1]),fabs(b[1])))&&
      fabs(a[2]-b[2])<=EPSILON*MAX(1.0f,MAX(fabs(a[2]),fabs(b[2])))&&
      fabs(a[3]-b[3])<=EPSILON*MAX(1.0f,MAX(fabs(a[3]),fabs(b[3])))&&
      fabs(a[4]-b[4])<=EPSILON*MAX(1.0f,MAX(fabs(a[4]),fabs(b[4])))&&
      fabs(a[5]-b[5])<=EPSILON*MAX(1.0f,MAX(fabs(a[5]),fabs(b[5])))&&
      fabs(a[6]-b[6])<=EPSILON*MAX(1.0f,MAX(fabs(a[6]),fabs(b[6])))&&
      fabs(a[7]-b[7])<=EPSILON*MAX(1.0f,MAX(fabs(a[7]),fabs(b[7])))&&
      fabs(a[8]-b[8])<=EPSILON*MAX(1.0f,MAX(fabs(a[8]),fabs(b[8])))&&
      fabs(a[9]-b[9])<=EPSILON*MAX(1.0f,MAX(fabs(a[9]),fabs(b[9])))&&
      fabs(a[10]-b[10])<=EPSILON*MAX(1.0f,MAX(fabs(a[10]),fabs(b[10])))&&
      fabs(a[11]-b[11])<=EPSILON*MAX(1.0f,MAX(fabs(a[11]),fabs(b[11])))&&
      fabs(a[12]-b[12])<=EPSILON*MAX(1.0f,MAX(fabs(a[12]),fabs(b[12])))&&
      fabs(a[13]-b[13])<=EPSILON*MAX(1.0f,MAX(fabs(a[13]),fabs(b[13])))&&
      fabs(a[14]-b[14])<=EPSILON*MAX(1.0f,MAX(fabs(a[14]),fabs(b[14])))&&
      fabs(a[15]-b[15])<=EPSILON*MAX(1.0f,MAX(fabs(a[15]),fabs(b[15]))));
}

int ULK_matrix_4x4_exact_equals(const ULK_matrix_4x4 a, const ULK_matrix_4x4 b)
{
   return (a[0]==b[0]&&
      a[1]==b[1]&&
      a[2]==b[2]&&
      a[3]==b[3]&&
      a[4]==b[4]&&
      a[5]==b[5]&&
      a[6]==b[6]&&
      a[7]==b[7]&&
      a[8]==b[8]&&
      a[9]==b[9]&&
      a[10]==b[10]&&
      a[11]==b[11]&&
      a[12]==b[12]&&
      a[13]==b[13]&&
      a[14]==b[14]&&
      a[15]==b[15]);
}

float ULK_matrix_4x4_frob(const ULK_matrix_4x4 in)
{
   return sqrtf(in[0]*in[0]+in[1]*in[1]+in[2]*in[2]+in[3]*in[3]+in[4]*in[4]+in[5]*in[5]+in[6]*in[6]+in[7]*in[7]+in[8]*in[8]+in[9]*in[9]+in[10]*in[10]+in[11]*in[11]+in[12]*in[12]+in[13]*in[13]+in[14]*in[14]+in[15]*in[15]);
}

void ULK_matrix_4x4_from_rotation(ULK_matrix_4x4 out, float rad, const ULK_vector_3d axis)
{
   float length = sqrtf(axis[0]*axis[0]+axis[1]*axis[1]+axis[2]*axis[2]);
   float s = sinf(rad);
   float c = cosf(rad);
   float t = 1.0f-c;

   if(length<EPSILON)
      return;

   length = 1.0f/length;
   float x = axis[0]*length;
   float y = axis[1]*length;
   float z = axis[2]*length;

   out[0] = x*x*t+c;
   out[1] = y*x*t+z*s;
   out[2] = z*x*t-y*s;
   out[3] = 0.0f;
   out[4] = x*y*t-z*s;
   out[5] = y*x*t+c;
   out[6] = z*y*t+x*s;
   out[7] = 0.0f;
   out[8] = x*z*t+y*s;
   out[9] = y*z*t-x*s;
   out[10] = z*z*t+c;
   out[11] = 0.0f;
   out[12] = 0.0f;
   out[13] = 0.0f;
   out[14] = 0.0f;
   out[15] = 1.0f;
}

void ULK_matrix_4x4_from_scaling(ULK_matrix_4x4 out, const ULK_vector_3d scaling)
{
   out[0] = scaling[0];
   out[1] = 0.0f;
   out[2] = 0.0f;
   out[3] = 0.0f;
   out[4] = 0.0f;
   out[5] = scaling[1];
   out[6] = 0.0f;
   out[7] = 0.0f;
   out[8] = 0.0f;
   out[9] = 0.0f;
   out[10] = scaling[2];
   out[11] = 0.0f;
   out[12] = 0.0f;
   out[13] = 0.0f;
   out[14] = 0.0f;
   out[15] = 1.0f;
}

void ULK_matrix_4x4_fom_translation(ULK_matrix_4x4 out, const ULK_vector_3d translation)
{
   out[0] = 1.0f;
   out[1] = 0.0f;
   out[2] = 0.0f;
   out[3] = 0.0f;
   out[4] = 0.0f;
   out[5] = 1.0f;
   out[6] = 0.0f;
   out[7] = 0.0f;
   out[8] = 0.0f;
   out[9] = 0.0f;
   out[10] = 1.0f;
   out[11] = 0.0f;
   out[12] = translation[0];
   out[13] = translation[1];
   out[14] = translation[2];
   out[15] = 1.0f;
}

ULK_matrix_4x4 *ULK_matrix_4x4_from_values(float m00, float m01, float m02, float m03, float m10, float m11, float m12, float m13, float m20, float m21, float m22, float m23, float m30, float m31, float m32, float m33)
{
   ULK_matrix_4x4 *out = malloc(sizeof(ULK_matrix_4x4));

   *out[0] = m00;
   *out[1] = m01;
   *out[2] = m02;
   *out[3] = m03;
   *out[4] = m10;
   *out[5] = m11;
   *out[6] = m12;
   *out[7] = m13;
   *out[8] = m20;
   *out[9] = m21;
   *out[10] = m22;
   *out[11] = m23;
   *out[12] = m30;
   *out[13] = m31;
   *out[14] = m32;
   *out[15] = m33;

   return out;
}

void ULK_matrix_4x4_from_xrotation(ULK_matrix_4x4 out, float rad)
{
   float s = sinf(rad);
   float c = cosf(rad);

   out[0] = 1.0f;
   out[1] = 0.0f;
   out[2] = 0.0f;
   out[3] = 0.0f;
   out[4] = 0.0f;
   out[5] = c;
   out[6] = s;
   out[7] = 0.0f;
   out[8] = 0.0f;
   out[9] = -s;
   out[10] = c;
   out[11] = 0.0f;
   out[12] = 0.0f;
   out[13] = 0.0f;
   out[14] = 0.0f;
   out[15] = 1.0f;
}

void ULK_matrix_4x4_from_yrotation(ULK_matrix_4x4 out, float rad)
{
   float s = sinf(rad);
   float c = cosf(rad);

   out[0] = c;
   out[1] = 0.0f;
   out[2] = -s;
   out[3] = 0.0f;
   out[4] = 0.0f;
   out[5] = 1.0f;
   out[6] = 0.0f;
   out[7] = 0.0f;
   out[8] = s;
   out[9] = 0.0f;
   out[10] = c;
   out[11] = 0.0f;
   out[12] = 0.0f;
   out[13] = 0.0f;
   out[14] = 0.0f;
   out[15] = 1.0f;
}

void ULK_matrix_4x4_from_zrotation(ULK_matrix_4x4 out, float rad)
{
   float s = sinf(rad);
   float c = cosf(rad);

   out[0] = c;
   out[1] = s;
   out[2] = 0.0f;
   out[3] = 0.0f;
   out[4] = -s;
   out[5] = c;
   out[6] = 0.0f;
   out[7] = 0.0f;
   out[8] = 0.0f;
   out[9] = 0.0f;
   out[10] = 1.0f;
   out[11] = 0.0f;
   out[12] = 0.0f;
   out[13] = 0.0f;
   out[14] = 0.0f;
   out[15] = 1.0f;
}

void ULK_matrix_4x4_frustum(ULK_matrix_4x4 out, float left, float right, float bottom, float top, float near, float far)
{
   float rl = 1.0f/(right-left);
   float tb = 1.0f/(top-bottom);
   float nf = 1.0f/(near-far);

   out[0] = 2.0f*near*rl;
   out[1] = 0.0f;
   out[2] = 0.0f;
   out[3] = 0.0f;
   out[4] = 0.0f;
   out[5] = 2.0f*near*tb;
   out[6] = 0.0f;
   out[7] = 0.0f;
   out[8] = rl*(right+left);
   out[9] = tb*(top+bottom);
   out[10] = nf*(far+near);
   out[11] = -1.0f;
   out[12] = 0.0f;
   out[13] = 0.0f;
   out[14] = 2.0f*far*near*nf;
   out[15] = 0.0f;
}

void ULK_matrix_4x4_identity(ULK_matrix_4x4 out)
{
   out[0] = 1.0f;
   out[1] = 0.0f;
   out[2] = 0.0f;
   out[3] = 0.0f;
   out[4] = 0.0f;
   out[5] = 1.0f;
   out[6] = 0.0f;
   out[7] = 0.0f;
   out[8] = 0.0f;
   out[9] = 0.0f;
   out[10] = 1.0f;
   out[11] = 0.0f;
   out[12] = 0.0f;
   out[13] = 0.0f;
   out[14] = 0.0f;
   out[15] = 1.0f;
}

void ULK_matrix_4x4_invert(ULK_matrix_4x4 out, const ULK_matrix_4x4 in)
{
   float in00 = in[0];
   float in01 = in[1];
   float in02 = in[2];
   float in03 = in[3];
   float in10 = in[4];
   float in11 = in[5];
   float in12 = in[6];
   float in13 = in[7];
   float in20 = in[8];
   float in21 = in[9];
   float in22 = in[10];
   float in23 = in[11];
   float in30 = in[12];
   float in31 = in[13];
   float in32 = in[14];
   float in33 = in[15];

   float b00 = in00*in11-in01*in10;
   float b01 = in00*in12-in02*in10;
   float b02 = in00*in13-in03*in10;
   float b03 = in01*in12-in02*in11;
   float b04 = in01*in13-in03*in11;
   float b05 = in02*in13-in03*in12;
   float b06 = in20*in31-in21*in30;
   float b07 = in20*in32-in22*in30;
   float b08 = in20*in33-in23*in30;
   float b09 = in21*in32-in22*in31;
   float b10 = in21*in33-in23*in31;
   float b11 = in22*in33-in23*in32;

   float determinant = b00*b11-b01*b10+b02*b09+b03*b08-b04*b07+b05*b06;

   if(determinant!=0.0f)
         determinant = 1.0f/determinant;

   out[0] = (in11*b11-in12*b10+in13*b09)*determinant;
   out[1] = (in02*b10-in01*b11-in03*b09)*determinant;
   out[2] = (in31*b05-in32*b04+in33*b03)*determinant;
   out[3] = (in22*b04-in21*b05-in23*b03)*determinant;
   out[4] = (in12*b08-in10*b11-in13*b07)*determinant;
   out[5] = (in00*b11-in02*b08+in03*b07)*determinant;
   out[6] = (in32*b02-in30*b05-in33*b01)*determinant;
   out[7] = (in20*b05-in22*b02+in23*b01)*determinant;
   out[8] = (in10*b10-in11*b08+in13*b06)*determinant;
   out[9] = (in01*b08-in00*b10-in03*b06)*determinant;
   out[10] = (in30*b04-in31*b02+in33*b00)*determinant;
   out[11] = (in21*b02-in20*b04-in23*b00)*determinant;
   out[12] = (in11*b07-in10*b09-in12*b06)*determinant;
   out[13] = (in00*b09-in01*b07+in02*b06)*determinant;
   out[14] = (in31*b01-in30*b03-in32*b00)*determinant;
   out[15] = (in20*b03-in21*b01+in22*b00)*determinant;
}

void ULK_matrix_4x4_look_at(ULK_matrix_4x4 out, const ULK_vector_3d eye, const ULK_vector_3d center, const ULK_vector_3d up)
{
   float z0 = eye[0]-center[0];
   float z1 = eye[1]-center[1];
   float z2 = eye[2]-center[2];

   if(fabs(z0)<EPSILON&&fabs(z1)<EPSILON&&fabs(z2)<EPSILON)
   {
      ULK_matrix_4x4_identity(out);
      return;
   }
   
   float length = 1.0f/sqrtf(z0*z0+z1*z1+z2*z2);
   z0*=length;
   z1*=length;
   z2*=length;
   float x0 = up[1]*z2-up[2]*z1;
   float x1 = up[2]*z0-up[0]*z2;
   float x2 = up[0]*z1-up[1]*z0;
   length = sqrtf(x0*x0+x1*x1+x2*x2);

   if(length!=0.0f)
      length = 1.0f/length;

   x0*=length;
   x1*=length;
   x2*=length;
   float y0 = z1*x2-z2*x1;
   float y1 = z2*x0-z0*x2;
   float y2 = z0*x1-z1*x0;
   length = sqrtf(y0*y0+y1*y1+y2*y2);

   if(length!=0.0f)
      length = 1.0f/length;

   y0*=length;
   y1*=length;
   y2*=length;

   out[0] = x0;
   out[1] = y0;
   out[2] = z0;
   out[3] = 0.0f;
   out[4] = x1;
   out[5] = y1;
   out[6] = z1;
   out[7] = 0.0f;
   out[8] = x2;
   out[9] = y2;
   out[10] = z2;
   out[11] = 0.0f;
   out[12] = -(x0*eye[0]+x1*eye[1]+x2*eye[2]);
   out[13] = -(y0*eye[0]+y1*eye[1]+y2*eye[2]);
   out[14] = -(z0*eye[0]+z1*eye[1]+z2*eye[2]);
   out[15] = 1.0f;
}

void ULK_matrix_4x4_multiply(ULK_matrix_4x4 out, const ULK_matrix_4x4 a, const ULK_matrix_4x4 b)
{
   float a00 = a[0];
   float a01 = a[1];
   float a02 = a[2];
   float a03 = a[3];
   float a10 = a[4];
   float a11 = a[5];
   float a12 = a[6];
   float a13 = a[7];
   float a20 = a[8];
   float a21 = a[9];
   float a22 = a[10];
   float a23 = a[11];
   float a30 = a[12];
   float a31 = a[13];
   float a32 = a[14];
   float a33 = a[15];

   float b0 = b[0];
   float b1 = b[1];
   float b2 = b[2];
   float b3 = b[3];

   out[0] = b0*a00+b1*a10+b2*a20+b3*a30;
   out[1] = b0*a01+b1*a11+b2*a21+b3*a31;
   out[2] = b0*a02+b1*a12+b2*a22+b3*a32;
   out[3] = b0*a03+b1*a13+b2*a23+b3*a33;

   b0 = b[4];
   b1 = b[5];
   b2 = b[6];
   b3 = b[7];

   out[4] = b0*a00+b1*a10+b2*a20+b3*a30;
   out[5] = b0*a01+b1*a11+b2*a21+b3*a31;
   out[6] = b0*a02+b1*a12+b2*a22+b3*a32;
   out[7] = b0*a03+b1*a13+b2*a23+b3*a33;

   b0 = b[8];
   b1 = b[9];
   b2 = b[10];
   b3 = b[11];

   out[8] = b0*a00+b1*a10+b2*a20+b3*a30;
   out[9] = b0*a01+b1*a11+b2*a21+b3*a31;
   out[10] = b0*a02+b1*a12+b2*a22+b3*a32;
   out[11] = b0*a03+b1*a13+b2*a23+b3*a33;

   b0 = b[12];
   b1 = b[13];
   b2 = b[14];
   b3 = b[15];

   out[12] = b0*a00+b1*a10+b2*a20+b3*a30;
   out[13] = b0*a01+b1*a11+b2*a21+b3*a31;
   out[14] = b0*a02+b1*a12+b2*a22+b3*a32;
   out[15] = b0*a03+b1*a13+b2*a23+b3*a33;
}

void ULK_matrix_4x4_multiply_scalar(ULK_matrix_4x4 out, const ULK_matrix_4x4 in, float scale)
{
   out[0] = in[0]*scale;
   out[1] = in[1]*scale;
   out[2] = in[2]*scale;
   out[3] = in[3]*scale;
   out[4] = in[4]*scale;
   out[5] = in[5]*scale;
   out[6] = in[6]*scale;
   out[7] = in[7]*scale;
   out[8] = in[8]*scale;
   out[9] = in[9]*scale;
   out[10] = in[10]*scale;
   out[11] = in[11]*scale;
   out[12] = in[12]*scale;
   out[13] = in[13]*scale;
   out[14] = in[14]*scale;
   out[15] = in[15]*scale;
}

void ULK_matrix_4x4_ortho(ULK_matrix_4x4 out, float left, float right, float bottom, float top, float near, float far)
{
   float lr = 1.0f/(left-right);
   float bt = 1.0f/(bottom-top);
   float nf = 1.0f/(near-far);

   out[0] = -2.0f*lr;
   out[1] = 0.0f;
   out[2] = 0.0f;
   out[3] = 0.0f;
   out[4] = 0.0f;
   out[5] = -2.0f*bt;
   out[6] = 0.0f;
   out[7] = 0.0f;
   out[8] = 0.0f;
   out[9] = 0.0f;
   out[10] = 2.0f*nf;
   out[11] = 0.0f;
   out[12] = (left+right)*lr;
   out[13] = (top+bottom)*bt;
   out[14] = (far+near)*nf;
   out[15] = 1.0f;
}

void ULK_matrix_4x4_perspective(ULK_matrix_4x4 out, float fovy, float aspect, float near, float far)
{
   float f = 1.0f/tanf(fovy/2.0f);
   float nf = 1.0f/(near-far);

   out[0] = f/aspect;
   out[1] = 0.0f;
   out[2] = 0.0f;
   out[3] = 0.0f;
   out[4] = 0.0f;
   out[5] = f;
   out[6] = 0.0f;
   out[7] = 0.0f;
   out[8] = 0.0f;
   out[9] = 0.0f;
   out[10] = (far+near)*nf;
   out[11] = -1.0f;
   out[12] = 0.0f;
   out[13] = 0.0f;
   out[14] = 2.0f*far*near*nf;
   out[15] = 0.0f;
}

void ULK_matrix_4x4_rotate(ULK_matrix_4x4 out, const ULK_matrix_4x4 in, float rad, const ULK_vector_3d axis)
{
   float length = sqrtf(axis[0]*axis[0]+axis[1]*axis[1]+axis[2]*axis[2]);

   if(length<EPSILON)
      return;

   length = 1.0f/length;
   float x = axis[0]*length;
   float y = axis[1]*length;
   float z = axis[2]*length;
   float s = sinf(rad);
   float c = cosf(rad);
   float t = 1.0f-c;
   float in00 = in[0];
   float in01 = in[1];
   float in02 = in[2];
   float in03 = in[3];
   float in10 = in[4];
   float in11 = in[5];
   float in12 = in[6];
   float in13 = in[7];
   float in20 = in[8];
   float in21 = in[9];
   float in22 = in[10];
   float in23 = in[11];
   float b00 = x*x*t+c;
   float b01 = y*x*t+z*s;
   float b02 = z*x*t-y*s;
   float b10 = x*y*t-z*s;
   float b11 = y*y*t+c;
   float b12 = z*y*t+x*s;
   float b20 = x*z*t+y*s;
   float b21 = y*z*t-x*s;
   float b22 = z*z*t+c;

   out[0] = in00*b00+in10*b01+in20*b02;
   out[1] = in01*b00+in11*b01+in21*b02;
   out[2] = in02*b00+in12*b01+in22*b02;
   out[3] = in03*b00+in13*b01+in23*b02;
   out[4] = in00*b10+in10*b11+in20*b12;
   out[5] = in01*b10+in11*b11+in21*b12;
   out[6] = in02*b10+in12*b11+in22*b12;
   out[7] = in03*b10+in13*b11+in23*b12;
   out[8] = in00*b20+in10*b21+in20*b22;
   out[9] = in01*b20+in11*b21+in21*b22;
   out[10] = in02*b20+in12*b21+in22*b22;
   out[11] = in03*b20+in13*b21+in23*b22;
   out[12] = in[12];
   out[13] = in[13];
   out[14] = in[14];
   out[15] = in[15];
}

void ULK_matrix_4x4_rotate_x(ULK_matrix_4x4 out, const ULK_matrix_4x4 in, float rad)
{
   float s = sinf(rad);
   float c = cosf(rad);
   float in10 = in[4];
   float in11 = in[5];
   float in12 = in[6];
   float in13 = in[7];
   float in20 = in[8];
   float in21 = in[9];
   float in22 = in[10];
   float in23 = in[11];

   out[0] = in[0];
   out[1] = in[1];
   out[2] = in[2];
   out[3] = in[3];
   out[4] = in10*c+in20*s;
   out[5] = in11*c+in21*s;
   out[6] = in12*c+in22*s;
   out[7] = in13*c+in23*s;
   out[8] = in20*c-in10*s;
   out[9] = in21*c-in11*s;
   out[10] = in22*c-in12*s;
   out[11] = in23*c-in13*s;
   out[12] = in[12];
   out[13] = in[13];
   out[14] = in[14];
   out[15] = in[15];
}

void ULK_matrix_4x4_rotate_y(ULK_matrix_4x4 out, const ULK_matrix_4x4 in, float rad)
{
   float s = sinf(rad);
   float c = cosf(rad);
   float in00 = in[0];
   float in01 = in[1];
   float in02 = in[2];
   float in03 = in[3];
   float in20 = in[8];
   float in21 = in[9];
   float in22 = in[10];
   float in23 = in[11];

   out[0] = in00*c-in20*s;
   out[1] = in01*c-in21*s;
   out[2] = in02*c-in22*s;
   out[3] = in03*c-in23*s;
   out[4] = in[4];
   out[5] = in[5];
   out[6] = in[6];
   out[7] = in[7];
   out[8] = in00*s+in20*c;
   out[9] = in01*s+in21*c;
   out[10] = in02*s+in22*c;
   out[11] = in03*s+in23*c;
   out[12] = in[12];
   out[13] = in[13];
   out[14] = in[14];
   out[15] = in[15];
}

void ULK_matrix_4x4_rotate_z(ULK_matrix_4x4 out, const ULK_matrix_4x4 in, float rad)
{
   float s = sinf(rad);
   float c = cosf(rad);
   float in00 = in[0];
   float in01 = in[1];
   float in02 = in[2];
   float in03 = in[3];
   float in10 = in[4];
   float in11 = in[5];
   float in12 = in[6];
   float in13 = in[7];

   out[0] = in00*c+in10*s;
   out[1] = in01*c+in11*s;
   out[2] = in02*c+in12*s;
   out[3] = in03*c+in13*s;
   out[4] = in10*c-in00*s;
   out[5] = in11*c-in01*s;
   out[6] = in12*c-in02*s;
   out[7] = in13*c-in03*s;
   out[8] = in[8];
   out[9] = in[9];
   out[10] = in[10];
   out[11] = in[11];
   out[12] = in[12];
   out[13] = in[13];
   out[14] = in[14];
   out[15] = in[15];
}

void ULK_matrix_4x4_scale(ULK_matrix_4x4 out, const ULK_matrix_4x4 in, const ULK_vector_3d scale)
{
   out[0] = in[0]*scale[0];
   out[1] = in[1]*scale[0];
   out[2] = in[2]*scale[0];
   out[3] = in[3]*scale[0];
   out[4] = in[4]*scale[1];
   out[5] = in[5]*scale[1];
   out[6] = in[6]*scale[1];
   out[7] = in[7]*scale[1];
   out[8] = in[8]*scale[2];
   out[9] = in[9]*scale[2];
   out[10] = in[10]*scale[2];
   out[11] = in[11]*scale[2];
   out[12] = in[12];
   out[13] = in[13];
   out[14] = in[14];
   out[15] = in[15];
}

void ULK_matrix_4x4_set(ULK_matrix_4x4 out, float m00, float m01, float m02, float m03, float m10, float m11, float m12, float m13, float m20, float m21, float m22, float m23, float m30, float m31, float m32, float m33)
{
   out[0] = m00;
   out[1] = m01;
   out[2] = m02;
   out[3] = m03;
   out[4] = m10;
   out[5] = m11;
   out[6] = m12;
   out[7] = m13;
   out[8] = m20;
   out[9] = m21;
   out[10] = m22;
   out[11] = m23;
   out[12] = m30;
   out[13] = m31;
   out[14] = m32;
   out[15] = m33;
}

void ULK_matrix_4x4_subtract(ULK_matrix_4x4 out, const ULK_matrix_4x4 a, const ULK_matrix_4x4 b)
{
   out[0] = a[0]-b[0];
   out[1] = a[1]-b[1];
   out[2] = a[2]-b[2];
   out[3] = a[3]-b[3];
   out[4] = a[4]-b[4];
   out[5] = a[5]-b[5];
   out[6] = a[6]-b[6];
   out[7] = a[7]-b[7];
   out[8] = a[8]-b[8];
   out[9] = a[9]-b[9];
   out[10] = a[10]-b[10];
   out[11] = a[11]-b[11];
   out[12] = a[12]-b[12];
   out[13] = a[13]-b[13];
   out[14] = a[14]-b[14];
   out[15] = a[15]-b[15];
}

void ULK_matrix_4x4_target_to(ULK_matrix_4x4 out, const ULK_vector_3d eye, const ULK_vector_3d target, const ULK_vector_3d up)
{
   float z0 = eye[0]-target[0];
   float z1 = eye[1]-target[1];
   float z2 = eye[2]-target[2];
   float length = z0*z0+z1*z1+z2*z2;

   if(length!=0.0f)
   {
      length = 1.0f/sqrtf(length);
      z0*=length;
      z1*=length;
      z2*=length;
   }

   float x0 = up[1]*z2-up[2]*z1;
   float x1 = up[2]*z0-up[0]*z2;
   float x2 = up[0]*z1-up[1]*z0;
   length = x0*x0+x1*x1+x2*x2;

   if(length!=0.0f)
   {
      length = 1.0f/sqrtf(length);
      x0*=length;
      x1*=length;
      x2*=length;
   }

   out[0] = x0;
   out[1] = x1;
   out[2] = x2;
   out[3] = 0.0f;
   out[4] = z1*x2-z2*x1;
   out[5] = z2*x0-z0*x2;
   out[6] = z0*x1-z1*x0;
   out[7] = 0.0f;
   out[8] = z0;
   out[9] = z1;
   out[10] = z2;
   out[11] = 0.0f;
   out[12] = eye[0];
   out[13] = eye[1];
   out[14] = eye[2];
   out[15] = 1.0f;
}

void ULK_matrix_4x4_translate(ULK_matrix_4x4 out, const ULK_matrix_4x4 in, const ULK_vector_3d translation)
{
   out[0] = in[0];
   out[1] = in[1];
   out[2] = in[2];
   out[3] = in[3];
   out[4] = in[4];
   out[5] = in[5];
   out[6] = in[6];
   out[7] = in[7];
   out[8] = in[8];
   out[9] = in[9];
   out[10] = in[10];
   out[11] = in[11];
   out[12] = in[0]*translation[0]+in[4]*translation[1]+in[8]*translation[2]+in[12];
   out[13] = in[1]*translation[0]+in[5]*translation[1]+in[9]*translation[2]+in[13];
   out[14] = in[2]*translation[0]+in[6]*translation[1]+in[10]*translation[2]+in[14];
   out[15] = in[3]*translation[0]+in[7]*translation[1]+in[11]*translation[2]+in[15];
}

void ULK_matrix_4x4_transpose(ULK_matrix_4x4 out, const ULK_matrix_4x4 in)
{
   float in01 = in[1];
   float in02 = in[2];
   float in03 = in[3];
   float in12 = in[6];
   float in13 = in[7];
   float in23 = in[11];

   out[0] = in[0];
   out[1] = in[4];
   out[2] = in[8];
   out[3] = in[12];
   out[4] = in01;
   out[5] = in[5];
   out[6] = in[9];
   out[7] = in[13];
   out[8] = in02;
   out[9] = in12;
   out[10] = in[10];
   out[11] = in[14];
   out[12] = in03;
   out[13] = in13;
   out[14] = in23;
   out[15] = in[15];
}

void ULK_vector_2d_transform_matrix_2x2(ULK_vector_2d out, const ULK_vector_2d in, const ULK_matrix_2x2 m)
{
   float x = in[0];
   float y = in[1];

   out[0] = m[0]*x+m[2]*y;
   out[1] = m[1]*x+m[3]*y;
}

void ULK_vector_2d_transform_matrix_2x3(ULK_vector_2d out, const ULK_vector_2d in, const ULK_matrix_2x3 m)
{
   float x = in[0];
   float y = in[1];

   out[0] = m[0]*x+m[2]*y+m[4];
   out[1] = m[1]*x+m[3]*y+m[5];
}

void ULK_vector_2d_transform_matrix_3x3(ULK_vector_2d out, const ULK_vector_2d in, const ULK_matrix_3x3 m)
{
   float x = in[0];
   float y = in[1];

   out[0] = m[0]*x+m[3]*y+m[6];
   out[1] = m[1]*x+m[4]*y+m[7];
}

void ULK_vector_2d_transform_matrix_4x4(ULK_vector_2d out, const ULK_vector_2d in, const ULK_matrix_4x4 m)
{
   float x = in[0];
   float y = in[1];

   out[0] = m[0]*x+m[4]*y+m[12];
   out[1] = m[1]*x+m[5]*y+m[13];
}

void ULK_vector_3d_transform_matrix_3x3(ULK_vector_3d out, const ULK_vector_3d in, const ULK_matrix_3x3 m)
{
   float x = in[0];
   float y = in[1];
   float z = in[2];

   out[0] = m[0]*x+m[3]*y+m[6]*z;
   out[1] = m[1]*x+m[4]*y+m[7]*z;
   out[2] = m[2]*x+m[5]*y+m[8]*z;
}

void ULK_vector_3d_transform_matrix_4x4(ULK_vector_3d out, const ULK_vector_3d in, const ULK_matrix_4x4 m)
{
   float x = in[0];
   float y = in[1];
   float z = in[2];
   float w = m[3]*x+m[7]*y+m[11]*z+m[15];

   if(w==0.0f)
      w = 1.0f;

   out[0] = (m[0]*x+m[4]*y+m[8]*z+m[12])/w;
   out[1] = (m[1]*x+m[5]*y+m[9]*z+m[13])/w;
   out[2] = (m[2]*x+m[6]*y+m[10]*z+m[14])/w;
}

void ULK_vector_4d_transform_matrix_4x4(ULK_vector_4d out, const ULK_vector_4d in, const ULK_matrix_4x4 m)
{
   float x = in[0];
   float y = in[1];
   float z = in[2];
   float w = in[3];

   out[0] = m[0]*x+m[4]*y+m[8]*z+m[12]*w;
   out[1] = m[1]*x+m[5]*y+m[9]*z+m[13]*w;
   out[2] = m[2]*x+m[6]*y+m[10]*z+m[14]*w;
   out[3] = m[3]*x+m[7]*y+m[11]*z+m[15]*w;
}
